Chi-Yun Wu, Zhang Lab, University of Pennsylvania
Alleloscope is able to profile allele-specific copy number alterations (DNA-level information) for each cell in the scATAC-seq data (typically used to detect chromatin accessibility). This facilitates 1. integration of allele-specific copy number alterations and chromatin accessibility for individual cells and 2. more reliably detection of copy number events with allelic imbalance.
For more information about the method, please check out the github and the paper.
The following are the input files for different steps.
library(Alleloscope) # load the library
setwd("~/Alleloscope/") # set path to the github folder
dir_path <- "./samples/SU008/scATAC/output/"; dir.create(dir_path) # set up output directory
size=read.table("data-raw/sizes.cellranger-atac-hg19-1.2.0.txt", stringsAsFactors = F) # read size file
size=size[1:22,]
# SNP by cell matrices for ref and alt alleles
barcodes=read.table("data-raw/SU008/scATAC/barcodes.tsv", sep='\t', stringsAsFactors = F, header=F)
alt_all=readMM("data-raw/SU008/scATAC/alt_all.mtx")
ref_all=readMM("data-raw/SU008/scATAC/ref_all.mtx")
var_all=read.table("data-raw/SU008/scATAC/var_all.vcf", header = F, sep='\t', stringsAsFactors = F)
# bin by cell matrices for tumor and normal for segmentation
raw_counts=read.table('data-raw/SU008/scATAC/chr200k_fragments_sub.txt', sep='\t', header=T, row.names = 1,stringsAsFactors = F)
colnames(raw_counts)=gsub("[.]","-", colnames(raw_counts))
cell_type=readRDS('data-raw/SU008/scATAC/cell_type_from_peaks.rds')
clust_order=plot_scATAC_cnv(raw_mat = raw_counts , cell_type = cell_type, size = size, plot_path = paste0(dir_path,"/cov_cna_plot2.pdf"))
Heatmap across chromosomes with example regions shown.
Obj=Createobj(alt_all =alt_all, ref_all = ref_all, var_all = var_all ,samplename='Sample', genome_assembly="GRCh37", dir_path=dir_path, barcodes=barcodes, size=size, assay='scATACseq')
Obj_filtered=Matrix_filter(Obj=Obj, cell_filter=5, SNP_filter=5, min_vaf = 0.1, max_vaf = 0.9)
# suggest setting min_vaf=0.1 and max_vaf=0.9 when SNPs are called in the tumor sample for higher confident SNPs
Obj_filtered$seg_table=readRDS("./data-raw/SU008/scATAC/seg_table_WES.rds")
Obj_filtered=Segments_filter(Obj_filtered=Obj_filtered, nSNP=500)
Obj_filtered=Est_regions(Obj_filtered = Obj_filtered, max_nSNP = 30000, plot_stat = T,cont = FALSE)
# Recommend max_nSNP <50000
# Regions without allelic imbalence do not coverge (Reach the max number of iterations.)
Obj_filtered$ref=Obj_filtered$seg_table_filtered$chrr[7] # choose one normal region
Obj_filtered$select_normal$barcode_normal=cell_type[which(cell_type[,2]!='tumor'),1]
Obj_filtered=Genotype_value(Obj_filtered = Obj_filtered, type='tumor', raw_counts=raw_counts, cov_adj=1) # for tumor
Obj_filtered=Genotype(Obj_filtered = Obj_filtered, cell_type=cell_type, xmax=3)
tmp=Select_normal(Obj_filtered = Obj_filtered, raw_counts=raw_counts, plot_theta = TRUE, cell_type = cell_type, mincell = 0)
rm(tmp)
The output clustering result for the example regions is shown below.
umap_peak=readRDS("./data-raw/SU008/scATAC/peak_umap_tumor.rds")
theta_hat_chr4=Obj_filtered$rds_list$`chr4:0`$theta_hat
theta_hat_chr4=theta_hat_chr4[match(rownames(umap_peak), names(theta_hat_chr4))]
umap_peak$theta_hat=theta_hat_chr4
The two signals can be visuzlized simultaneously for each cell in the scATAC-seq data.
library(ggplot2)
library(RColorBrewer)
# UMAP
pp=ggplot(umap_peak,aes(x = UMAP1, y=UMAP2)) +
geom_point(size=1,alpha=0.5, aes(color=(theta_hat))) +
scale_color_gradientn(colors = colorRampPalette(rev(brewer.pal(n = 7, name ="RdYlBu")))(100))+
theme_bw()
print(pp)
# density plot
pd <-ggplot(umap_peak, aes(x=theta_hat, color=peak_group)) +
geom_density()+
scale_color_manual(values = c("peak2" = "#F8766D","peak1" = "#00BFC4")) +
theme_bw()
print(pd)
Wu, C.-Y. et al. Alleloscope: Integrative analysis of single cell haplotype-divergent copy number alterations and chromatin accessibility changes reveals novel clonal architecture of cancers. bioRxiv (2020): https://doi.org/10.1101/2020.10.23.349407